import torch
import timeit
n = 100000000
cpu_a = torch.randn([1,n])
cpu_b = torch.randn([n,1])

print(n, cpu_a.device, cpu_b.device)

gpu_a = torch.randn([1,n]).cuda()
gpu_b = torch.randn([n,1]).cuda()

print(n, gpu_a.device, gpu_b.device)

def cpu_run():
    c = torch.matmul(cpu_a, cpu_b)
    return c

def gpu_run():
    c = torch.matmul(gpu_a, gpu_b)
    return c

cpu_time = timeit.timeit(cpu_run, number=50)
gpu_time = timeit.timeit(gpu_run, number=50)

print('warmup:',cpu_time,gpu_time)